For your handwritten solution, scan or take a picture of them.
For your code, only .ipynb file will be graded.
Please compress all the files to make a single .zip file
Do not submit a printed version of your code. It will not be graded.
AutoEncoder(AE) is a neural network model which trains dimension reduction. The features shrunk by the encoder are suitable for reconstructing rather than valuable to solve our problem. Therefore, when we reconstruct a data from the feature on latent space we can find unwanted entanglement that hinders distinguish from class to class. However, the entanglement gives us some advantage as following. Discrete inputs produce discrete features on the latent space and the discrete features generate discrete outputs as well. But due to the entanglement, we can generate data from unseen points on the latent space so that the decoder become a model generates data from continuous distribution on the latent space. Thus, data generation is one of main purpose of AE and there are famous models such as Variational AutoEncoder or Adversarial AutoEncoder.
Conditional AutoEncoder is one of the modified AE that focus on data generation. It determine what class to generate in advance so that it can generate data of specific class. This needs additional information which means not good in terms of dimension reduction.
You will modeling and testify CAE through the following cells:
(0) import modules
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
(1) load MNIST data
# your code here
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_imgs =
train_labels =
test_imgs =
test_labels =
n_train =
n_test =
print ("The number of training images : {}, shape : {}".format(n_train, train_imgs.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_imgs.shape))
# Solution
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_imgs = mnist.train.images
train_labels = mnist.train.labels
test_imgs = mnist.test.images
test_labels = mnist.test.labels
n_train = train_imgs.shape[0]
n_test = test_imgs.shape[0]
print ("The number of training images : {}, shape : {}".format(n_train, train_imgs.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_imgs.shape))
(2) Define CAE(Conditional AutoEncoder) structure
# your code here
n_input =
n_encoder1 =
n_encoder2 =
n_latent = 2
n_decoder2 =
n_decoder1 =
n_label = 10
# Solution
n_input = 28*28
n_encoder1 = 500
n_encoder2 = 300
n_latent = 2
n_decoder2 = 300
n_decoder1 = 500
n_label = 10
(3) Construct CAE model. unlike AE as unsupervised learning, CAE needs label data as supervised learning.
Note above figure. Input both image and label data using tf.concat([input, label]).
x = tf.placeholder(tf.float32, [None, n_input]) # image
y = tf.placeholder(tf.float32, [None, n_label]) # label
# your code here
weights = {
}
biases = {
}
def batch_norm_flat(batch_flat) :
epsilon = 1e-5
beta = tf.Variable(tf.constant(0.0, shape=[1]), trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[1]), trainable=True)
mean, variance = tf.nn.moments(batch_flat, axes=[0])
return norm_batch
def encoder(x, weights, biases, label):
return latent
def decoder(latent, weights, biases, label):
return reconst
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])
# solution
weights = {
'encoder1' : tf.Variable(tf.random_normal([n_input+n_label, n_encoder1], stddev = 0.1)),
'encoder2' : tf.Variable(tf.random_normal([n_encoder1, n_encoder2], stddev = 0.1)),
'latent' : tf.Variable(tf.random_normal([n_encoder2, n_latent], stddev = 0.1)),
'decoder2' : tf.Variable(tf.random_normal([n_latent+n_label, n_decoder2], stddev = 0.1)),
'decoder1' : tf.Variable(tf.random_normal([n_decoder2, n_decoder1], stddev = 0.1)),
'reconst' : tf.Variable(tf.random_normal([n_decoder1, n_input], stddev = 0.1))
}
biases = {
'encoder1' : tf.Variable(tf.random_normal([n_encoder1], stddev = 0.1)),
'encoder2' : tf.Variable(tf.random_normal([n_encoder2], stddev = 0.1)),
'latent' : tf.Variable(tf.random_normal([n_latent], stddev = 0.1)),
'decoder2' : tf.Variable(tf.random_normal([n_decoder2], stddev = 0.1)),
'decoder1' : tf.Variable(tf.random_normal([n_decoder1], stddev = 0.1)),
'reconst' : tf.Variable(tf.random_normal([n_input], stddev = 0.1))
}
def batch_norm_flat(batch_flat) :
epsilon = 1e-5
beta = tf.Variable(tf.constant(0.0, shape=[1]), trainable=True)
gamma = tf.Variable(tf.constant(1.0, shape=[1]), trainable=True)
mean, variance = tf.nn.moments(batch_flat, axes=[0])
norm_batch = tf.nn.batch_normalization(batch_image, mean, variance, beta, gamma, epsilon)
return norm_batch
def encoder(x, weights, biases, label):
encoder1 = tf.add(tf.matmul(tf.concat([x, label], 1), weights['encoder1']), biases['encoder1'])
encoder1 = tf.nn.tanh(encoder1)
encoder2 = tf.add(tf.matmul(encoder1, weights['encoder2']), biases['encoder2'])
encoder2 = tf.nn.tanh(encoder2)
latent = tf.add(tf.matmul(encoder2, weights['latent']), biases['latent'])
return latent
def decoder(latent, weights, biases, label):
decoder2 = tf.add(tf.matmul(tf.concat([latent, label], 1), weights['decoder2']), biases['decoder2'])
decoder2 = tf.nn.tanh(decoder2)
decoder1 = tf.add(tf.matmul(decoder2, weights['decoder1']), biases['decoder1'])
decoder1 = tf.nn.tanh(decoder1)
reconst = tf.add(tf.matmul(decoder1, weights['reconst']), biases['reconst'])
return reconst
(4) initialize the model & optimizer.
latent = encoder(x, weights, biases, y)
reconst = decoder(latent, weights, biases, y)
# your code here
loss =
LR =
optm =
latent = encoder(x, weights, biases, y)
reconst = decoder(latent, weights, biases, y)
# solution
loss = tf.reduce_mean(tf.square(tf.subtract(x, reconst)))
LR = 0.0001
optm = tf.train.AdamOptimizer(LR).minimize(loss)
(5) Train the model and plot the loss
n_batch = 50
n_iter = 5000
n_prt = 250
def train_batch_maker(batch_size):
random_idx = np.random.randint(n_train, size = batch_size)
return train_imgs[random_idx], train_labels[random_idx]
def test_batch_maker(batch_size):
random_idx = np.random.randint(n_test, size = batch_size)
return test_imgs[random_idx], test_labels[random_idx]
sess = tf.Session()
# your code here
plt.figure(figsize=(10,8))
plt.plot(np.arange(len(loss_record_train))*n_prt, loss_record_train, label = 'training')
plt.plot(np.arange(len(loss_record_test))*n_prt, loss_record_test, label = 'testing')
plt.xlabel('iteration', fontsize = 15)
plt.ylabel('loss', fontsize = 15)
plt.legend(fontsize = 12)
plt.ylim([0,np.max(loss_record_train)])
plt.show()
n_batch = 50
n_iter = 5000
n_prt = 250
def train_batch_maker(batch_size):
random_idx = np.random.randint(n_train, size = batch_size)
return train_imgs[random_idx], train_labels[random_idx]
def test_batch_maker(batch_size):
random_idx = np.random.randint(n_test, size = batch_size)
return test_imgs[random_idx], test_labels[random_idx]
sess = tf.Session()
# solution
init = tf.global_variables_initializer()
sess.run(init)
loss_record_train = []
loss_record_test = []
for epoch in range(n_iter+1):
train_x, train_y = train_batch_maker(n_batch)
sess.run(optm, feed_dict = {x : train_x, y: train_y})
if epoch % n_prt == 0:
test_x, test_y = test_batch_maker(n_batch)
c1 = sess.run(loss, feed_dict = {x: train_x, y: train_y})
c2 = sess.run(loss, feed_dict = {x: test_x, y: test_y})
loss_record_train.append(c1)
loss_record_test.append(c2)
print ("({:4d}/{}) loss: {}".format(epoch, n_iter, c1))
plt.figure(figsize=(10,8))
plt.plot(np.arange(len(loss_record_train))*n_prt, loss_record_train, label = 'training')
plt.plot(np.arange(len(loss_record_test))*n_prt, loss_record_test, label = 'testing')
plt.xlabel('iteration', fontsize = 15)
plt.ylabel('loss', fontsize = 15)
plt.legend(fontsize = 12)
plt.ylim([0,np.max(loss_record_train)])
plt.show()
(6) Reconstruct 10 random images and compare it with its original.
# your code here
# solution
plt.figure(figsize = (10, 12))
for i in range(10):
test_x, test_y = test_batch_maker(1)
x_reconst = sess.run(reconst, feed_dict = {x: test_x, y:test_y})
plt.subplot(5,4, 2*i+1)
plt.imshow(test_x.reshape(28,28), 'gray')
plt.title('Input Image') ;plt.axis('off')
plt.subplot(5,4, 2*i+2)
plt.imshow(x_reconst.reshape(28,28), 'gray')
plt.title('Reconstructed Image'); plt.axis('off')
plt.show()
(7) Plot the latent space.
test_x, test_y = test_batch_maker(500)
# your code here
test_x, test_y = test_batch_maker(500)
# solution
test_y_arg = np.argmax(test_y, axis = 1)
test_latent = sess.run(latent, feed_dict = {x: test_x, y: test_y})
xmin, xmax = np.min(test_latent[:,0]), np.max(test_latent[:,0])
ymin, ymax = np.min(test_latent[:,1]), np.max(test_latent[:,1])
plt.figure(figsize = (10,10))
for i in range(10):
plt.scatter(test_latent[test_y_arg == i,0], test_latent[test_y_arg == i,1],
label = str(i), marker=i)
plt.title('Latent Space', fontsize=15)
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.legend(fontsize = 15)
plt.xlim([xmin, xmax]); plt.ylim([ymin, ymax])
plt.show()
(8) Plot the latent space onto separate plot for each digit to see the each distribution.
# your code here
# solution
plt.figure(figsize = (18, 24))
plt.suptitle('Latent Space (for each digit)', fontsize=20)
for i in range(10):
plt.subplot(4, 3, i+1)
plt.scatter(test_latent[test_y_arg == i,0], test_latent[test_y_arg == i,1],
label = str(i), marker=i, color=(plt.rcParams['axes.prop_cycle'].by_key()['color'])[i])
plt.title(i, fontsize=15)
plt.xlim([xmin, xmax]); plt.ylim([ymin, ymax])
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.show()
(9) Generate images of each digit by feeding a new point and different label conditions to the decoder. Discuss whether those are in a similar style.
new_x, new_y = np.random.uniform(xmin, xmax), np.random.uniform(ymin, ymax)
new_data = # your code here
print('new_data:', new_data)
fig = plt.figure(figsize = (10,10))
for i in range(10):
plt.scatter(test_latent[test_y_arg == i,0], test_latent[test_y_arg == i,1],
label = str(i), marker=i)
plt.axvline(x=new_data[0,0], c='r', alpha=0.2)
plt.axhline(y=new_data[0,1], c='r', alpha=0.2)
plt.scatter(new_data[0,0], new_data[0,1], marker='x', c='k', s = 100, label = 'new data')
plt.title('Latent Space', fontsize=15)
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.legend(fontsize = 15)
plt.xlim([xmin, xmax]); plt.ylim([ymin, ymax])
plt.show()
latent_input = tf.placeholder(tf.float32, [None, n_latent])
reconst = decoder(latent_input, weights, biases, y)
plt.figure(figsize=(18, 8))
for i in range(10):
feed = {latent_input: new_data, y: np.eye(10, dtype=np.float32)[i:i+1]}
fake_image = sess.run(reconst, feed_dict = feed)
plt.subplot(2, 5, i+1)
plt.imshow(fake_image.reshape(28,28), 'gray')
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
# solution
new_x, new_y = np.random.uniform(xmin, xmax), np.random.uniform(ymin, ymax)
new_data = np.array([[-4, 3]])
print('new_data:', new_data)
fig = plt.figure(figsize = (10,10))
for i in range(10):
plt.scatter(test_latent[test_y_arg == i,0], test_latent[test_y_arg == i,1],
label = str(i), marker=i)
plt.axvline(x=new_data[0,0], c='r', alpha=0.2)
plt.axhline(y=new_data[0,1], c='r', alpha=0.2)
plt.scatter(new_data[0,0], new_data[0,1], marker='x', c='k', s = 100, label = 'new data')
plt.title('Latent Space', fontsize=15)
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.legend(fontsize = 15)
plt.xlim([xmin, xmax]); plt.ylim([ymin, ymax])
plt.show()
latent_input = tf.placeholder(tf.float32, [None, n_latent])
reconst = decoder(latent_input, weights, biases, y)
plt.figure(figsize=(18, 8))
for i in range(10):
feed = {latent_input: new_data, y: np.eye(10, dtype=np.float32)[i:i+1]}
fake_image = sess.run(reconst, feed_dict = feed)
plt.subplot(2, 5, i+1)
plt.imshow(fake_image.reshape(28,28), 'gray')
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
(10) Draw the manifold image with latent features for all digits and discuss the result based on what you found in (9).
nx = 20
ny = 20
x_values = np.linspace(xmin, xmax, nx)
y_values = np.linspace(ymin, ymax, ny)
canvas = np.empty((28*ny, 28*nx))
for k in range(10):
plt.figure(figsize = (16, 8))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y_arg == k,0], test_latent[test_y_arg == k,1],
label = str(k), marker=k, color=(plt.rcParams['axes.prop_cycle'].by_key()['color'])[k])
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 12)
plt.xlim([-6, 6]); plt.ylim([-6, 6])
plt.subplot(1,2,2)
# your code here
plt.imshow(canvas, 'gray')
plt.title('Manifold', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
# Initialize canvas
nx = 20
ny = 20
x_values = np.linspace(xmin, xmax, nx)
y_values = np.linspace(ymin, ymax, ny)
canvas = np.empty((28*ny, 28*nx))
for k in range(10):
plt.figure(figsize = (16, 8))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y_arg == k,0], test_latent[test_y_arg == k,1],
label = str(k), marker=k, color=(plt.rcParams['axes.prop_cycle'].by_key()['color'])[k])
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 12)
plt.xlim([-6, 6]); plt.ylim([-6, 6])
plt.subplot(1,2,2)
# solution
for i, yi in enumerate(y_values):
for j, xi in enumerate(x_values):
latent_ = np.array([[xi, yi]])
feed = {latent_input: latent_, y: np.eye(10, dtype=np.float32)[k:k+1]}
fake_image = sess.run(reconst, feed_dict = feed)
canvas[(nx-i-1)*28:(nx-i)*28,j*28:(j+1)*28] = fake_image.reshape(28, 28)
plt.imshow(canvas, 'gray')
plt.title('Manifold', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()